import os
from collections import defaultdict
import torch
import numpy as np
import scipy.stats
from torch.distributions import constraints
from matplotlib import pyplot
import random
import pyro
import pyro.distributions as dist
from pyro import poutine
from pyro.infer.autoguide import AutoDelta
from pyro.optim import Adam
from pyro.infer import SVI, TraceEnum_ELBO, config_enumerate, infer_discrete
from pyro.ops.indexing import Vindex
from pyro.infer import MCMC, NUTS
## Data preparation and cleaning
# importing required packages
import pyreadr
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
%autosave 30
import umap
import plotly
import plotly.graph_objs as go
import pickle
pyro.enable_validation(True)
# import data and convert to python objects
# load Rdata object
rdata = pyreadr.read_r('tomtom_data/det_dfs.Rdata')
# pull out separate dataframes, one with choice text, the other with numeric responses
df = rdata['df']
dfn = rdata['dfn']
df.head()
# read in state labels
states = pd.read_csv('tomtom_data/states.csv')
states['statepair'] = states['state1'] + '_' + states['state2']# construct state pair strings
states.head()
# extract all the self ratings
ind_selfq1 = df.columns.get_loc('X1_Q12_1') # location of the first question
trans_self = df.iloc[:,list(range(ind_selfq1,(ind_selfq1+75)))]
print('shape of self-rating dataframe: ',trans_self.shape)
trans_self.columns = states['statepair'].tolist() # renaming transitioin columns with the corresponding transition pairs
#print(trans_self.columns[1:5])
#trans_self.head()
# extract all the specific target ratings
ind_targq1 = df.columns.get_loc('X1_Q9_1') # location of the first question
trans_targ = df.iloc[:,list(range(ind_targq1,(ind_targq1+75)))]
print('shape of target-rating dataframe: ', trans_targ.shape)
trans_targ.columns = states['statepair'].tolist() # renaming transitioin columns with the corresponding transition pairs
#print(trans_targ.columns[1:5])
#trans_targ.head()
# extract all the group level ratings
ind_avgq1 = df.columns.get_loc('X1_Q11_1') # location of the first question
trans_avg = df.iloc[:,list(range(ind_avgq1,(ind_avgq1+75)))]
print('shape of group-rating dataframe: ', trans_avg.shape)
trans_avg.columns = states['statepair'].tolist() # renaming transitioin columns with the corresponding transition pairs
#print(trans_avg.columns[1:5])
#trans_targ.head()
# inspect the "spread" of target vs group ratings using pairwise correlation between subjects
# getting pairwise correlations
trans_self_corbtsub = trans_self.transpose().corr()
trans_targ_corbtsub = trans_targ.transpose().corr()
trans_avg_corbtsub = trans_avg.transpose().corr()
# get the mean pairwise correlation for each of the three sets of correlations
def mean_pairwise_corr(corbtsub):
corbtsub = np.array(corbtsub)
corbtsub_flat = corbtsub[np.tril_indices(corbtsub.shape[0],-1)] # gett lower triangle and flatten
# print(corbtsub_flat.shape)
return corbtsub_flat, np.nanmean(corbtsub_flat)
trans_self_pairwiseflat, trans_self_meanpairwise = mean_pairwise_corr(trans_self_corbtsub)
trans_targ_pairwiseflat, trans_targ_meanpairwise = mean_pairwise_corr(trans_targ_corbtsub)
trans_avg_pairwiseflat, trans_avg_meanpairwise = mean_pairwise_corr(trans_avg_corbtsub)
print('The spread of self ratings: ', trans_self_meanpairwise)
print('The spread of target ratings: ', trans_targ_meanpairwise)
print('The spread of group ratings: ', trans_avg_meanpairwise)
scipy.stats.ttest_rel(trans_targ_pairwiseflat,trans_avg_pairwiseflat, nan_policy='omit')
# Additional way to look at the spread of specific vs group: looking at the variance for each of the items
# the graph shows less spread at the item level for group ratings than for self or target specific ratings.
itemvar_trans_self = trans_self.var(axis = 0)
itemvar_trans_targ = trans_targ.var(axis = 0)
itemvar_trans_avg = trans_avg.var(axis = 0)
sns.distplot(itemvar_trans_self, label='Self')
sns.distplot(itemvar_trans_targ, label='Target')
sns.distplot(itemvar_trans_avg, label='Group')
plt.legend()
plt.show()
# reusable code for pre processing each of the three sets of ratings into pyro compatible format
def data_transform(trans):
# set the constant for converting probability to frequency
freq_constant = 10000
# indexing autotransition columns
colnames = trans.columns.tolist()
cnsplit = [p.split('_') for p in colnames]
idx_autotransition = [p[0] == p[1] for p in cnsplit] # list of boolean, True = is autotransition
# 1. normalizing with autotransitions included, one df for probability, one converted to frequency
# initialize 2 dataframes
t_norm_all = pd.DataFrame(columns=trans.columns, index = trans.index)
t_norm_all_f = pd.DataFrame(columns=trans.columns, index = trans.index)
# normalize by row-sum every five columns, since the columns are already arranged by from-state in 5
for i in range(0,trans.shape[1],5):
dftemp = trans.iloc[:,i:(i+5)]
dftemp_rowsum = dftemp.sum(axis = 1)
normed_cols = dftemp/dftemp_rowsum[:,np.newaxis]
t_norm_all.iloc[:,i:(i+5)] = normed_cols
t_norm_all_f.iloc[:,i:(i+5)] = (normed_cols * freq_constant).round()
# 2. two additional dataframes: normed with auto transition but don't contain them
t_norm_all_noauto = t_norm_all.loc[:,[not t for t in idx_autotransition]]
t_norm_all_noauto_f = t_norm_all_f.loc[:,[not t for t in idx_autotransition]]
# 3. finally, normalizing without autotransitions, and also convert to frequency
trans_noauto = trans.loc[:,[not t for t in idx_autotransition]]
t_norm_noauto = pd.DataFrame(columns=trans_noauto.columns, index = trans_noauto.index)
t_norm_noauto_f = pd.DataFrame(columns=trans_noauto.columns, index = trans_noauto.index)
# normalize by row-sum every FOUR columns, grouped by from-state in 4 without autotransition
for i in range(0,trans_noauto.shape[1],4):
dftemp = trans_noauto.iloc[:,i:(i+4)]
dftemp_rowsum = dftemp.sum(axis = 1)
normed_cols = dftemp/dftemp_rowsum[:,np.newaxis]
t_norm_noauto.iloc[:,i:(i+5)] = normed_cols
t_norm_noauto_f.iloc[:,i:(i+5)] = (normed_cols * freq_constant).round()
t_norm_all_3d = torch.tensor(np.stack([np.array(t_norm_all.iloc[0]).reshape(15,5)
for i in np.arange(t_norm_all.shape[0])]).astype('float32'))
t_norm_noauto_3d = torch.tensor(np.stack([np.array(t_norm_noauto.iloc[0]).reshape(15,4)
for i in np.arange(t_norm_noauto.shape[0])]).astype('float32'))
t_raw_all_3d = torch.tensor(np.stack([np.array(trans.iloc[0]).reshape(15,5)
for i in np.arange(trans.shape[0])]).astype('float32'))
t_raw_noauto_3d = torch.tensor(np.stack([np.array(trans_noauto.iloc[0]).reshape(15,4)
for i in np.arange(trans_noauto.shape[0])]).astype('float32'))
return t_norm_all_3d, t_norm_noauto_3d, t_raw_all_3d/100, t_raw_noauto_3d/100
global tself_norm_all_3d, tself_norm_noauto_3d, tself_raw_all_3d, tself_raw_noauto_3d
global ttarg_norm_all_3d, ttarg_norm_noauto_3d, ttarg_raw_all_3d, ttarg_raw_noauto_3d
global tavg_norm_all_3d, tavg_norm_noauto_3d, tavg_raw_all_3d, tavg_raw_noauto_3d
tself_norm_all_3d, tself_norm_noauto_3d, tself_raw_all_3d, tself_raw_noauto_3d = data_transform(trans_self)
ttarg_norm_all_3d, ttarg_norm_noauto_3d, ttarg_raw_all_3d, ttarg_raw_noauto_3d = data_transform(trans_targ)
tavg_norm_all_3d, tavg_norm_noauto_3d, tavg_raw_all_3d, tavg_raw_noauto_3d = data_transform(trans_avg)
# define a model function that's dynamically declared
@config_enumerate()
def model(data):
# Background probability of different groups (assume equally likely)
weights = pyro.sample('weights', dist.Dirichlet(0.5 * torch.ones(K)))
# declare number of columns based on whether auto-transitions are included
if auto == 'all':
ncol = 5
elif auto == 'noauto':
ncol = 4
# declare model parameters based on whether the data are row-normalized
if norm == 'norm':
with pyro.plate('components', K):
# concentration parameters
concentration = pyro.sample('concentration',
dist.Gamma(2 * torch.ones(15,ncol), 1/3 * torch.ones(15,ncol)).to_event(2))
with pyro.plate('data', data.shape[0]):
assignment = pyro.sample('assignment', dist.Categorical(weights))
#d = dist.Dirichlet(concentration[assignment,:,:].clone().detach()) # .detach() might interfere with backprop
d = dist.Dirichlet(concentration[assignment,:,:])
pyro.sample('obs', d.to_event(1), obs=data)
elif norm == 'raw':
with pyro.plate('components', K):
alphas = pyro.sample('alpha', dist.Gamma(2 * torch.ones(15,ncol), 1/3 * torch.ones(15,ncol)).to_event(2))
betas = pyro.sample('beta', dist.Gamma(2 * torch.ones(15,ncol), 1/3 * torch.ones(15,ncol)).to_event(2))
with pyro.plate('data', data.shape[0]):
assignment = pyro.sample('assignment', dist.Categorical(weights))
d = dist.Beta(alphas[assignment,:,:], betas[assignment,:,:])
pyro.sample('obs', d.to_event(2), obs=data)
# function used to initialize model
def initialize(seed,model,data):
global global_guide, svi
pyro.set_rng_seed(seed)
pyro.clear_param_store()
if norm == 'norm':
global_guide = AutoDelta(poutine.block(model, expose=['weights', 'concentration']))
elif norm == 'raw':
global_guide = AutoDelta(poutine.block(model, expose=['weights', 'alpha', 'beta']))
svi = SVI(model, global_guide, optim, loss=elbo)
return svi.loss(model, global_guide, data)
# define a code chunk that does the SVI step for singel variation
def tomtom_svi():
pyro.clear_param_store()
#declare dataset to be modeled
dtname = 't{}_{}_{}_3d'.format(target, norm, auto)
print("running SVI with: {}".format(dtname))
data = globals()[dtname]
loss, seed = min((initialize(seed,model,data), seed) for seed in range(100))
initialize(seed,model,data)
print('seed = {}, initial_loss = {}'.format(seed, loss))
gradient_norms = defaultdict(list)
for name, value in pyro.get_param_store().named_parameters():
value.register_hook(lambda g, name=name: gradient_norms[name].append(g.norm().item()))
losses = []
for i in range(3000):
loss = svi.step(data)
#print(loss)
losses.append(loss)
if i % 100 == 0:
print('.',end = '')
print('\n final loss: {}\n'.format(losses[-1]))
map_estimates = global_guide(data)
weights = map_estimates['weights']
print('weights = {}'.format(weights.data.numpy()))
if norm == 'norm':
concentration = map_estimates['concentration']
print('concentration = {}'.format(concentration.data.numpy()))
elif norm == 'raw':
alpha = map_estimates['alpha']
print('alphas = {}'.format(alpha.data.numpy()))
beta = map_estimates['beta']
print('beta = {}'.format(beta.data.numpy()))
return seed, map_estimates
def tomtom_mcmc(seed,nsample = 5000, burnin = 1000):
pyro.clear_param_store()
pyro.set_rng_seed(seed)
#declare dataset to be modeled
dtname = 't{}_{}_{}_3d'.format(target, norm, auto)
print("running MCMC with: {}".format(dtname))
data = globals()[dtname]
nuts_kernel = NUTS(model)
mcmc = MCMC(nuts_kernel, num_samples=nsample, warmup_steps=burnin)
mcmc.run(data)
posterior_samples = mcmc.get_samples()
return posterior_samples
with open('tomtom_mcmc.pkl','rb') as input:
[posterior_samples_self_norm_all,
posterior_samples_self_norm_noauto,
posterior_samples_self_raw_all,
posterior_samples_self_raw_noauto] = pickle.load(input)
random.seed(200515)
# umap visualization
# rudimentary test run of umap on self transition data
fit_trans_self_default = umap.UMAP()
u_trans_self_default = fit_trans_self_default.fit_transform(trans_self)
plt.scatter(u_trans_self_default[:,0],u_trans_self_default[:,1])
fit_trans_self_neighbor30 = umap.UMAP(n_neighbors=15,min_dist=0.05)
u_trans_self_neighbor30 = fit_trans_self_neighbor30.fit_transform(trans_self)
plt.scatter(u_trans_self_neighbor30[:,0],u_trans_self_neighbor30[:,1])
# pca to see how many dimensions best capture
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
scaler = StandardScaler()
trans_self_scaled = scaler.fit_transform(trans_self)
decomp = PCA()
trans_self_pca = decomp.fit(trans_self_scaled)
plt.scatter(list(range(np.shape(trans_self_pca.explained_variance_ratio_)[0])),np.array(trans_self_pca.explained_variance_ratio_))
# above pca result suggests 4-6 factors
# here we construct a new umap object with 3 dimensions and visulize it to see if there are obv clusters
fit_trans_self_3d = umap.UMAP(n_neighbors=47,min_dist=0,n_components=3)
u_trans_self_3d = fit_trans_self_3d.fit_transform(trans_self)
# Configure Plotly to be rendered inline in the notebook.
plotly.offline.init_notebook_mode()
trace = go.Scatter3d(
x=u_trans_self_3d[:,0], # <-- Put your data instead
y=u_trans_self_3d[:,1], # <-- Put your data instead
z=u_trans_self_3d[:,2], # <-- Put your data instead
mode='markers',
marker={
'size': 10,
'opacity': 0.8,
#'color': ,
}
)
# Configure the layout.
layout = go.Layout(
margin={'l': 0, 'r': 0, 'b': 0, 't': 0}
)
data = [trace]
plot_figure = go.Figure(data=data, layout=layout)
# Render the plot.
plotly.offline.iplot(plot_figure)
K = 2
target = 'self' # 'self','targ','avg'
norm = 'norm' # 'norm','raw'
auto = 'all' # 'noauto','all'
optim = pyro.optim.Adam({'lr': 0.0005, 'betas': [0.8, 0.99]})
elbo = TraceEnum_ELBO(max_plate_nesting=1)
seed_self_norm_all, map_self_norm_all = tomtom_svi()
posterior_samples_self_norm_all = tomtom_mcmc(seed_self_norm_all)
posterior_samples_self_norm_all['weights'].shape
#X = posterior_samples['concentration'][:,0,0,0]
X = posterior_samples_self_norm_all['weights'][:,0]
#pyplot.figure(figsize=(8, 8), dpi=100).set_facecolor('white')
pyplot.plot(np.arange(X.numpy().size),X.numpy())
#pyplot.contour(np.log(h + 3).T, extent=[xs.min(), xs.max(), ys.min(), ys.max()],
# colors='white', alpha=0.8)
pyplot.title('Sampling trace of top left concentration with NUTS')
pyplot.ylabel('Sample value')
pyplot.xlabel('Step')
pyplot.tight_layout()
K = 2
target = 'self' # 'self','targ','avg'
norm = 'norm' # 'norm','raw'
auto = 'noauto' # 'noauto','all'
optim = pyro.optim.Adam({'lr': 0.0005, 'betas': [0.8, 0.99]})
elbo = TraceEnum_ELBO(max_plate_nesting=1)
seed_self_norm_noauto, map_self_norm_noauto = tomtom_svi()
posterior_samples_self_norm_noauto = tomtom_mcmc(seed_self_norm_noauto)
posterior_samples_self_norm_noauto['weights'].shape
#X = posterior_samples['concentration'][:,0,0,0]
X = posterior_samples_self_norm_noauto['weights'][:,0]
#pyplot.figure(figsize=(8, 8), dpi=100).set_facecolor('white')
pyplot.plot(np.arange(X.numpy().size),X.numpy())
#pyplot.contour(np.log(h + 3).T, extent=[xs.min(), xs.max(), ys.min(), ys.max()],
# colors='white', alpha=0.8)
pyplot.title('Sampling trace of top left concentration with NUTS')
pyplot.ylabel('Sample value')
pyplot.xlabel('Step')
pyplot.tight_layout()
K = 2
target = 'self' # 'self','targ','avg'
norm = 'raw' # 'norm','raw'
auto = 'all' # 'noauto','all'
optim = pyro.optim.Adam({'lr': 0.0005, 'betas': [0.8, 0.99]})
elbo = TraceEnum_ELBO(max_plate_nesting=1)
seed_self_raw_all, map_self_raw_all = tomtom_svi()
posterior_samples_self_raw_all = tomtom_mcmc(seed_self_raw_all)
posterior_samples_self_raw_all['weights'].shape
#X = posterior_samples['concentration'][:,0,0,0]
X = posterior_samples_self_raw_all['weights'][:,0]
#pyplot.figure(figsize=(8, 8), dpi=100).set_facecolor('white')
pyplot.plot(np.arange(X.numpy().size),X.numpy())
#pyplot.contour(np.log(h + 3).T, extent=[xs.min(), xs.max(), ys.min(), ys.max()],
# colors='white', alpha=0.8)
pyplot.title('Sampling trace of top left concentration with NUTS')
pyplot.ylabel('Sample value')
pyplot.xlabel('Step')
pyplot.tight_layout()
K = 2
target = 'self' # 'self','targ','avg'
norm = 'raw' # 'norm','raw'
auto = 'noauto' # 'noauto','all'
optim = pyro.optim.Adam({'lr': 0.0005, 'betas': [0.8, 0.99]})
elbo = TraceEnum_ELBO(max_plate_nesting=1)
seed_self_raw_noauto, map_self_raw_noauto = tomtom_svi()
posterior_samples_self_raw_noauto = tomtom_mcmc(seed_self_raw_noauto)
posterior_samples_self_raw_noauto['weights'].shape
#X = posterior_samples['concentration'][:,0,0,0]
X = posterior_samples_self_raw_noauto['weights'][:,0]
#pyplot.figure(figsize=(8, 8), dpi=100).set_facecolor('white')
pyplot.plot(np.arange(X.numpy().size),X.numpy())
#pyplot.contour(np.log(h + 3).T, extent=[xs.min(), xs.max(), ys.min(), ys.max()],
# colors='white', alpha=0.8)
pyplot.title('Sampling trace of top left concentration with NUTS')
pyplot.ylabel('Sample value')
pyplot.xlabel('Step')
pyplot.tight_layout()
random.seed(200506)
# umap visualization
# rudimentary test run of umap on self transition data
fit_trans_targ_default = umap.UMAP()
u_trans_targ_default = fit_trans_targ_default.fit_transform(trans_targ)
plt.scatter(u_trans_targ_default[:,0],u_trans_targ_default[:,1])
fit_trans_targ_neighbor30 = umap.UMAP(n_neighbors=30,min_dist=0.05)
u_trans_targ_neighbor30 = fit_trans_targ_neighbor30.fit_transform(trans_targ)
plt.scatter(u_trans_targ_neighbor30[:,0],u_trans_targ_neighbor30[:,1])
# pca to see how many dimensions best capture
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
scaler = StandardScaler()
trans_targ_scaled = scaler.fit_transform(trans_targ)
decomp = PCA()
trans_targ_pca = decomp.fit(trans_targ_scaled)
plt.scatter(list(range(np.shape(trans_targ_pca.explained_variance_ratio_)[0])),np.array(trans_targ_pca.explained_variance_ratio_))
# above pca result suggests 4-6 factors
# here we construct a new umap object with 3 dimensions and visulize it to see if there are obv clusters
fit_trans_targ_3d = umap.UMAP(n_neighbors=47,min_dist=0,n_components=3)
u_trans_targ_3d = fit_trans_targ_3d.fit_transform(trans_targ)
# Configure Plotly to be rendered inline in the notebook.
plotly.offline.init_notebook_mode()
trace = go.Scatter3d(
x=u_trans_targ_3d[:,0], # <-- Put your data instead
y=u_trans_targ_3d[:,1], # <-- Put your data instead
z=u_trans_targ_3d[:,2], # <-- Put your data instead
mode='markers',
marker={
'size': 10,
'opacity': 0.8,
#'color': ,
}
)
# Configure the layout.
layout = go.Layout(
margin={'l': 0, 'r': 0, 'b': 0, 't': 0}
)
data = [trace]
plot_figure = go.Figure(data=data, layout=layout)
# Render the plot.
plotly.offline.iplot(plot_figure)
K = 2
target = 'targ' # 'self','targ','avg'
norm = 'norm' # 'norm','raw'
auto = 'all' # 'noauto','all'
optim = pyro.optim.Adam({'lr': 0.0005, 'betas': [0.8, 0.99]})
elbo = TraceEnum_ELBO(max_plate_nesting=1)
seed_targ_norm_all, map_targ_norm_all = tomtom_svi()
posterior_samples_targ_norm_all = tomtom_mcmc(seed_targ_norm_all)
posterior_samples_targ_norm_all['weights'].shape
#X = posterior_samples['concentration'][:,0,0,0]
X = posterior_samples_targ_norm_all['weights'][:,0]
#pyplot.figure(figsize=(8, 8), dpi=100).set_facecolor('white')
pyplot.plot(np.arange(X.numpy().size),X.numpy())
#pyplot.contour(np.log(h + 3).T, extent=[xs.min(), xs.max(), ys.min(), ys.max()],
# colors='white', alpha=0.8)
pyplot.title('Sampling trace of top left concentration with NUTS')
pyplot.ylabel('Sample value')
pyplot.xlabel('Step')
pyplot.tight_layout()
K = 2
target = 'targ' # 'self','targ','avg'
norm = 'norm' # 'norm','raw'
auto = 'noauto' # 'noauto','all'
optim = pyro.optim.Adam({'lr': 0.0005, 'betas': [0.8, 0.99]})
elbo = TraceEnum_ELBO(max_plate_nesting=1)
seed_targ_norm_noauto, map_targ_norm_noauto = tomtom_svi()
posterior_samples_targ_norm_noauto = tomtom_mcmc(seed_targ_norm_noauto)
posterior_samples_targ_norm_noauto['weights'].shape
#X = posterior_samples['concentration'][:,0,0,0]
X = posterior_samples_targ_norm_noauto['weights'][:,0]
#pyplot.figure(figsize=(8, 8), dpi=100).set_facecolor('white')
pyplot.plot(np.arange(X.numpy().size),X.numpy())
#pyplot.contour(np.log(h + 3).T, extent=[xs.min(), xs.max(), ys.min(), ys.max()],
# colors='white', alpha=0.8)
pyplot.title('Sampling trace of top left concentration with NUTS')
pyplot.ylabel('Sample value')
pyplot.xlabel('Step')
pyplot.tight_layout()
K = 2
target = 'targ' # 'self','targ','avg'
norm = 'raw' # 'norm','raw'
auto = 'all' # 'noauto','all'
optim = pyro.optim.Adam({'lr': 0.0005, 'betas': [0.8, 0.99]})
elbo = TraceEnum_ELBO(max_plate_nesting=1)
seed_targ_raw_all, map_targ_raw_all = tomtom_svi()
posterior_samples_targ_raw_all = tomtom_mcmc(seed_targ_raw_all)
posterior_samples_targ_raw_all['weights'].shape
#X = posterior_samples['concentration'][:,0,0,0]
X = posterior_samples_targ_raw_all['weights'][:,0]
#pyplot.figure(figsize=(8, 8), dpi=100).set_facecolor('white')
pyplot.plot(np.arange(X.numpy().size),X.numpy())
#pyplot.contour(np.log(h + 3).T, extent=[xs.min(), xs.max(), ys.min(), ys.max()],
# colors='white', alpha=0.8)
pyplot.title('Sampling trace of top left concentration with NUTS')
pyplot.ylabel('Sample value')
pyplot.xlabel('Step')
pyplot.tight_layout()
K = 2
target = 'targ' # 'self','targ','avg'
norm = 'raw' # 'norm','raw'
auto = 'noauto' # 'noauto','all'
optim = pyro.optim.Adam({'lr': 0.0005, 'betas': [0.8, 0.99]})
elbo = TraceEnum_ELBO(max_plate_nesting=1)
seed_targ_raw_noauto, map_targ_raw_noauto = tomtom_svi()
posterior_samples_targ_raw_noauto = tomtom_mcmc(seed_targ_raw_noauto)
posterior_samples_targ_raw_noauto['weights'].shape
#X = posterior_samples['concentration'][:,0,0,0]
X = posterior_samples_targ_raw_noauto['weights'][:,0]
#pyplot.figure(figsize=(8, 8), dpi=100).set_facecolor('white')
pyplot.plot(np.arange(X.numpy().size),X.numpy())
#pyplot.contour(np.log(h + 3).T, extent=[xs.min(), xs.max(), ys.min(), ys.max()],
# colors='white', alpha=0.8)
pyplot.title('Sampling trace of top left concentration with NUTS')
pyplot.ylabel('Sample value')
pyplot.xlabel('Step')
pyplot.tight_layout()
random.seed(200506)
# umap visualization
# rudimentary test run of umap on self transition data
fit_trans_avg_default = umap.UMAP()
u_trans_avg_default = fit_trans_avg_default.fit_transform(trans_avg)
plt.scatter(u_trans_avg_default[:,0],u_trans_avg_default[:,1])
fit_trans_avg_neighbor30 = umap.UMAP(n_neighbors=30,min_dist=0.05)
u_trans_avg_neighbor30 = fit_trans_avg_neighbor30.fit_transform(trans_avg)
plt.scatter(u_trans_avg_neighbor30[:,0],u_trans_avg_neighbor30[:,1])
# pca to see how many dimensions best capture
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
scaler = StandardScaler()
trans_avg_scaled = scaler.fit_transform(trans_avg)
decomp = PCA()
trans_avg_pca = decomp.fit(trans_avg_scaled)
plt.scatter(list(range(np.shape(trans_avg_pca.explained_variance_ratio_)[0])),np.array(trans_avg_pca.explained_variance_ratio_))
# above pca result suggests 4-6 factors
# here we construct a new umap object with 3 dimensions and visulize it to see if there are obv clusters
fit_trans_avg_3d = umap.UMAP(n_neighbors=47,min_dist=0,n_components=3)
u_trans_avg_3d = fit_trans_avg_3d.fit_transform(trans_avg)
# Configure Plotly to be rendered inline in the notebook.
plotly.offline.init_notebook_mode()
trace = go.Scatter3d(
x=u_trans_avg_3d[:,0], # <-- Put your data instead
y=u_trans_avg_3d[:,1], # <-- Put your data instead
z=u_trans_avg_3d[:,2], # <-- Put your data instead
mode='markers',
marker={
'size': 10,
'opacity': 0.8,
#'color': ,
}
)
# Configure the layout.
layout = go.Layout(
margin={'l': 0, 'r': 0, 'b': 0, 't': 0}
)
data = [trace]
plot_figure = go.Figure(data=data, layout=layout)
# Render the plot.
plotly.offline.iplot(plot_figure)
K = 2
target = 'avg' # 'self','targ','avg'
norm = 'norm' # 'norm','raw'
auto = 'all' # 'noauto','all'
optim = pyro.optim.Adam({'lr': 0.0005, 'betas': [0.8, 0.99]})
elbo = TraceEnum_ELBO(max_plate_nesting=1)
seed_avg_norm_all, map_avg_norm_all = tomtom_svi()
posterior_samples_avg_norm_all = tomtom_mcmc(seed_avg_norm_all)
posterior_samples_avg_norm_all['weights'].shape
#X = posterior_samples['concentration'][:,0,0,0]
X = posterior_samples_avg_norm_all['weights'][:,0]
#pyplot.figure(figsize=(8, 8), dpi=100).set_facecolor('white')
pyplot.plot(np.arange(X.numpy().size),X.numpy())
#pyplot.contour(np.log(h + 3).T, extent=[xs.min(), xs.max(), ys.min(), ys.max()],
# colors='white', alpha=0.8)
pyplot.title('Sampling trace of top left concentration with NUTS')
pyplot.ylabel('Sample value')
pyplot.xlabel('Step')
pyplot.tight_layout()
K = 2
target = 'avg' # 'self','targ','avg'
norm = 'norm' # 'norm','raw'
auto = 'noauto' # 'noauto','all'
optim = pyro.optim.Adam({'lr': 0.0005, 'betas': [0.8, 0.99]})
elbo = TraceEnum_ELBO(max_plate_nesting=1)
seed_avg_norm_noauto, map_avg_norm_noauto = tomtom_svi()
posterior_samples_avg_norm_noauto = tomtom_mcmc(seed_avg_norm_noauto)
posterior_samples_avg_norm_noauto['weights'].shape
#X = posterior_samples['concentration'][:,0,0,0]
X = posterior_samples_avg_norm_noauto['weights'][:,0]
#pyplot.figure(figsize=(8, 8), dpi=100).set_facecolor('white')
pyplot.plot(np.arange(X.numpy().size),X.numpy())
#pyplot.contour(np.log(h + 3).T, extent=[xs.min(), xs.max(), ys.min(), ys.max()],
# colors='white', alpha=0.8)
pyplot.title('Sampling trace of top left concentration with NUTS')
pyplot.ylabel('Sample value')
pyplot.xlabel('Step')
pyplot.tight_layout()
K = 2
target = 'avg' # 'self','targ','avg'
norm = 'raw' # 'norm','raw'
auto = 'all' # 'noauto','all'
optim = pyro.optim.Adam({'lr': 0.0005, 'betas': [0.8, 0.99]})
elbo = TraceEnum_ELBO(max_plate_nesting=1)
seed_avg_raw_all, map_avg_raw_all = tomtom_svi()
posterior_samples_avg_raw_all = tomtom_mcmc(seed_avg_raw_all)
posterior_samples_avg_raw_all['weights'].shape
#X = posterior_samples['concentration'][:,0,0,0]
X = posterior_samples_avg_raw_all['weights'][:,0]
#pyplot.figure(figsize=(8, 8), dpi=100).set_facecolor('white')
pyplot.plot(np.arange(X.numpy().size),X.numpy())
#pyplot.contour(np.log(h + 3).T, extent=[xs.min(), xs.max(), ys.min(), ys.max()],
# colors='white', alpha=0.8)
pyplot.title('Sampling trace of top left concentration with NUTS')
pyplot.ylabel('Sample value')
pyplot.xlabel('Step')
pyplot.tight_layout()
K = 2
target = 'avg' # 'self','targ','avg'
norm = 'raw' # 'norm','raw'
auto = 'noauto' # 'noauto','all'
optim = pyro.optim.Adam({'lr': 0.0005, 'betas': [0.8, 0.99]})
elbo = TraceEnum_ELBO(max_plate_nesting=1)
seed_avg_raw_noauto, map_avg_raw_noauto = tomtom_svi()
posterior_samples_avg_raw_noauto = tomtom_mcmc(seed_avg_raw_noauto)
posterior_samples_avg_raw_noauto['weights'].shape
#X = posterior_samples['concentration'][:,0,0,0]
X = posterior_samples_avg_raw_noauto['weights'][:,0]
#pyplot.figure(figsize=(8, 8), dpi=100).set_facecolor('white')
pyplot.plot(np.arange(X.numpy().size),X.numpy())
#pyplot.contour(np.log(h + 3).T, extent=[xs.min(), xs.max(), ys.min(), ys.max()],
# colors='white', alpha=0.8)
pyplot.title('Sampling trace of top left concentration with NUTS')
pyplot.ylabel('Sample value')
pyplot.xlabel('Step')
pyplot.tight_layout()